home-code

示例❤源码

<?php
namespace imcat;

require 
'../incs/func.php';
$data comFiles::get(__DIR__.'/celm.htm');
$act req('act''elm');

?>

<!DOCTYPE html><html><head>
<?php glbHtml::page('init'); ?>
</head>
<body>

<nav>
    <a href="?act=pq1">extQuery采集1</a>
    <a href="?act=pq2">extQuery采集2</a>
    <a href="?act=elm">元素分离</a>
</nav>    

<?php if($act=='pq2'){ ?>
<h3>采集页面2</h3>

<?php

$url2 
'https://dg.haofang.net/ershoufang/p2.html';
$lists extQuery::pqa([$url2,30],'.list-content li.block');
$timer2 microtime(1);
foreach(
$lists as $li) {
    
$tmp pq($li)->find('div.title a');
    
$row['url'] = pq($tmp)->attr('href');
    
$row['title'] = pq($tmp)->text();
    
$row['thumb'] = pq($li)->find('img:first')->attr('src');
    
$row['area'] = pq($li)->find('.address')->text();
    
$row['price'] = pq($li)->find('.total-price')->find('span')->text();
    
$row['punit'] = pq($li)->find('.unit-price')->text();
    
dump($row); //echo implode(', ', $row)."<br>\n";
}


$timer1 microtime(1);
$url2 'http://hezhou.loupan.com/xinfang/p1/';
$lists extQuery::pqa([$url2,30],'.list-house li.item');
$timer2 microtime(1);
foreach(
$lists as $li) {
    
$row['url'] = pq($li)->find('a:first')->attr('href');
    
$img pq($li)->find('img:first');
    
$thumb pq($img)->attr('data-src');
    
$row['thumb'] = strpos($thumb,'images/nopic.') ? '' $thumb;
    
$row['title'] = pq($img)->attr('alt');
    
$row['area'] = pq($li)->find('.address')->find('span')->text();
    
$row['price'] = pq($li)->find('.price')->text();
    
dump($row); //echo implode(', ', $row)."<br>\n"; die();
}

$timer3 microtime(1);
$tmp $timer2-$timer1dump($tmp);
$tmp $timer3-$timer2dump($tmp);

$data '
<body>
<div class="topCity-content"> 
<a href="http://gz.fzg360.com">广州</a>
<a href="http://sz.fzg360.com">深圳</a>
<a href="http://dg.fzg360.com">东莞</a>
</div>
</body>
'
;

$lists extQuery::pqa($data,'a',0); 
foreach(
$lists as $li) {
    echo 
pq($li)->text()."<br>\n";
}

echo 
'-end-1'//die('xx');

$doc extQuery::newDocumentFile('http://m.gz.fzg360.com/index/citylist.html'); 
$lists extQuery::pqa($doc,'span');
foreach(
$lists as $li) {
    echo 
pq($li)->text()."<br>\n";
}
#dump($doc);
?>

<?php } if($act=='pq1'){ ?>
<h3>采集页面1</h3>

<?php

$url1 
'http://hezhou.loupan.com/xinfang/p1/';
$url2 'http://hezhou.loupan.com/xinfang/p2/';
$doc1 extQuery::newDocumentFile($url1);
$doc2 extQuery::newDocumentFile($url2);
$did1 $doc1->getDocumentID(); echo "$did1<br>\n";
$did2 $doc2->getDocumentID(); echo "$did2<br>\n";

echo 
"<pre>\n";

#phpQuery::selectDocument($doc1);
echo "<hr>\n";
$lists = [];
$lis1 pq('.list-house li.item'$did1);
foreach(
$lis1 as $li) {
    
$row['url'] = pq($li)->find('a:first')->attr('href');
    
$img pq($li)->find('img:first');
    
$thumb pq($img)->attr('data-src');
    
$row['thumb'] = strpos($thumb,'images/nopic.') ? '' $thumb;
    
$row['title'] = pq($img)->attr('alt');
    
$row['area'] = pq($li)->find('.address')->text();
    
$row['price'] = pq($li)->find('.price')->text();
    
$lists[] = $row;
}
print_r($lists);

#phpQuery::selectDocument($doc2);
echo "<hr>\n";
$lis2 pq('.list-house li.item'$did2);
foreach(
$lis2 as $li) {
    
$tmp pq($li)->find('h2')->text();
    echo 
"$tmp<br>\n";
}

echo 
"<hr>\n";
#var_dump($doc1);

?>

<?php } if($act=='elm'){ ?>
<h3>分离html元素</h3>

<li>采集目标页:<a href="./celm.htm" target='_celm'>celm.htm</a></li>

<?php

$val 
basElm::getVal($data,'title'); echo "\n\n<hr>title-val:\n$val\n";
$val basElm::getPos($data,'title'); echo "\n\n<hr>title-pos:\n$val\n";

$val basElm::getVal($data,'id="link"(*)id="test"','->'); echo "\n\n<hr>val:\n$val>>>\n";
$val basElm::getPos($data,'id="link"(*)id="test"'); echo "\n\n<hr>pos:\n$val>>>\n";

$val basElm::getVal($data,'<div class="content">(*)</div>'); echo "\n\n<hr>val2:\n$val>>>\n";
$val basElm::getPos($data,'<div class="content">(*)id="link"'); echo "\n\n<hr>pos2:\n$val>>>\n";
$val basElm::getPos($data,'<div class="content">(*)</div>'); echo "\n\n<hr>pos3:\n$val>>>\n";

$val basElm::getPos($data,'id="xnon15"(*)id="xnon32"'); echo "\n\n<hr>pos4:\n$val>>>\n";

$arr basElm::getArr($data,'<li class(*)</li>'); echo "\n\n<hr>getArr:\n"print_r($arr); echo "\n";
$arr basElm::getPreg($data,'<li class="cls1">(*)</li>'); echo "\n\n<hr>getPreg:\n"print_r($arr); echo "\n"

$arr basElm::getAttr($data,'target','key'); echo "\n\n<hr>getArr-a:\n"print_r($arr); echo "\n"
$val basElm::getAttr($data,'target','key',1); echo "\n\n<hr>getAttr-no:\n$val\n";
$arr basElm::getAttr($data,'noattr','key'); echo "\n\n<hr>getArr-a:\n"print_r($arr); echo "\n"

$val basElm::getAttr($data,'witdh','key',0); echo "\n\n<hr>getAttr-witdh:\n$val\n";
$arr basElm::getAttr($data,'href','url'); echo "\n\n<hr>getArr-urls:\n"print_r($arr); echo "\n"

}

?>

</body>
</html>

-End-